
***********************************************************
* compare market shares from top builder to those in corelogic
***********************************************************

local file_path_in "/tochange/"

 use "`file_path_in'/house county/rawHOUSEBUILDERall.dta", clear

drop nsold
gen nsold=1	

** make plot of number of new homes sold over timec  
collapse  grossrev rank prev avgprofit closings topbuilder top200 nsale (sum) nsold (sum) units*, by(seller year)  // create ranking in each year

preserve
	bysort seller: egen avgsold = mean(nsold)
	collapse avgsold, by(seller)
	gsort -avgsold
	outsheet * using "`file_path_in'/builderlistCL.csv", comma replace
restore

keep if year>=2007
assert nsold==nsale 
order seller year nsold closings rank
sort seller year
 
gen ratio = nsold / closings if !mi(nsold) & !mi(closings)
bysort seller: egen mratio=mean(ratio)

** generate rank from corelogic tallies of homes sold
gsort year -nsold     
by year : gen rankcl = _n

label var rank "Builder Mag Rank"
label var rankcl "Corelogic Imputed Rank"
binscatter rankcl rank  if rank<100 & rankcl<100, xtitle("Builder Mag Rank") ytitle("Corelogic Imputed Rank") 
graph export "`file_path_in'/reg output/validate_mktshr.eps", replace
graph export "`file_path_in'/reg output/validate_mktshr.png", replace

** fraction of builders in topbuilder that match to corelogic
use "`file_path_in'/Top Builders/topbuildersovertime.dta", clear
tempfile temp
preserve
	use "`file_path_in'/builder county/rawBUILDERall_FIPS.dta", clear
	bysort seller year: gen count=_n
	keep if count==1
	drop count
	save `temp', replace
restore	
merge m:1 seller year using `temp',
tab _merge if _merge==3 | _merge==1  //  tells you what fraction of top builders match to CL


